clear
clear matrix
clear mata
cap log close
set more 1
set maxvar 10000


***********************************************************************************************************
** Setup files: Huber, Schelker, Strittmatter, “Direct and Indirect Effects based on Changes-in-Changes” **
***********************************************************************************************************


global data "...\"


*******************************************
**** Application I: Jobs II Evaluation ****
*******************************************


** Import data Screening period: data on date of birth, date of data entry, date of experimental treatment
use "${data}JobsII\02739-0001-data.dta", clear 
keep v1 v11 v12 v13 v14 v15 v16 v17 v18 v19
rename v11 mbirth
rename v12 dbirth
rename v13 ybirth
rename v14 mdata
rename v15 ddata
rename v16 ydata
rename v17 mtreat
rename v18 dtreat
rename v19 ytreat
sort v1
save "${data}JobsII\jobsII_date.dta", replace


** Main data from total dataset ("part 12"")
use "${data}JobsII\02739-0012-data.dta", clear 

keep cond01 v0518 v1 v9001 v9103 v9103a v9105 v9106 v9107 v1407 v1002 v1518 v2002 v2518 v3002 v3518 

preserve
keep cond01 v0518 v1 v9001 v9103 v9103a v9105 v9106 v9107 v1407
gen time = -1
rename	v0518 	v518
save "${data}JobsII\jobsII_0.dta", replace
restore

preserve
keep cond01 v0518 v1 v9001 v9103 v9103a v9105 v9106 v9107 v1407 v1002 v1518 
gen time = 0
rename	v1002	v002
rename	v1518	v518
save "${data}JobsII\jobsII_1.dta", replace
restore

preserve
keep cond01 v0518 v1 v9001 v9103 v9103a v9105 v9106 v9107 v1407 v2002 v2518 
rename	v2002	v002
rename	v2518	v518
gen time = 1
save "${data}JobsII\jobsII_2.dta", replace
restore

preserve
keep cond01 v0518 v1 v9001 v9103 v9103a v9105 v9106 v9107 v1407 v3002 v3518 
gen time = 2
rename	v3002	v002
rename	v3518	v518
save "${data}JobsII\jobsII_3.dta", replace
restore

use "${data}JobsII\jobsII_0.dta", clear
append using "${data}JobsII\jobsII_1.dta"
append using "${data}JobsII\jobsII_2.dta"
append using "${data}JobsII\jobsII_3.dta"

merge m:1 v1 using "${data}JobsII\jobsII_date.dta" // add dates
tab _merge
drop _merge


** Variable defintions
gen treatment = cond01 
label var treatment "treatment group (incl. high & low depression risk)"
gen highdep = 0 if v9105 == 1
replace highdep = 1 if v9105 == 2
label var highdep "high depression risk"
drop if v9105 == 3 // very high depression ("cases")
gen risk1 = v9103
label var risk1 "risk score - continuous"
gen risk2 = v9103a
label var risk1 "risk score - continuous, fill from paradox"
gen mediator = v9107 
replace mediator = 0 if treatment == 0
label var mediator "Attendance: control & no-shows (0), attend (1)"
rename	v518 	depress
label var depress "depression score"

rename v1 pid
rename v9001 sex

label var time "-1=eval; 0=pre-treatment; 1=6weeks; 2=6month; 3=2years"

order pid time treatment risk* highdep mediator depress
sum treatment risk* highdep mediator  depress
tab depress time
drop v* cond01

sort pid time
save "${data}JobsII\HSS_jobsII.dta", replace

erase "${data}JobsII\jobsII_date.dta"
forv w=0(1)3 {
	erase "${data}JobsII\jobsII_`w'.dta"
}

*** Summary statistics ***

tab time treatment if depress!=.

** Table 2 Two-sided t-test and standardized difference: SD = (meanT-meanC)/sqrt(0.5(stdevT+stdevC)) 
// T=-1 -- > Randomization
ttest depress if time==-1, by(treatment)
stddiff depress if time==-1, by(treatment)
// T=2
ttest depress if time==2, by(treatment)
stddiff depress if time==2, by(treatment)

** Attrition
// T=-1 -- > T=2 -- > Attrition
preserve
sort pid time
keep if time==-1 | time==2
drop if depress==. & time==-1
bysort pid: gen leaddep = depress[_n+1] 
gen ind1 = 0 if leaddep==.
replace ind1 = 1 if ind1 == .
stddiff depress if time==-1 & ind1==1, by(treatment)  // subsample of those not missing in T==2
ttest depress if time==-1 & ind1==1, by(treatment)
restore

